ddPCR HIV RNA Processing

Author

Antoine Chaillon

Published

February 26, 2026

Equations

\[ \text{1. cDNA Synthesis Concentration: } C_{cDNA} = \frac{C_{RNA} \times V_{RNA\_in\_cDNA}}{V_{Total\_cDNA\_rxn}} \]

\[ \text{2. cDNA Mass per Well: } Mass_{well} = \left( C_{cDNA} \times \frac{V_{cDNA\_in\_PCR}}{V_{PCR\_rxn}} \right) \times V_{PCR\_rxn} \]

\[ \text{3. Final Normalization: } \frac{Copies}{1000ng} = \frac{Copies_{well}}{Mass_{well}} \times 1000 \]

Assay Parameters (to adjust)

Table 1: Assay setup parameters and volumes.

Description

Symbol

Value

Unit

RNA volume used in cDNA synthesis

VOL_RNA_IN_CDNA

15

µL

Total volume of cDNA synthesis reaction

TOTAL_VOL_CDNA

20

µL

cDNA volume used in ddPCR reaction

VOL_CDNA_IN_PCR

8

µL

Total volume of ddPCR reaction

VOL_PCR_RXN

20

µL

Load Map Files

Show the R code
# --- Define Paths ---
map_dir <- "/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/data/2026-02-25/"
map_files <- list.files(map_dir, pattern = "*.xlsx", full.names = TRUE)
# map_files
# 



map_df1 <- map_files %>%
  # 0. Name the vector so map_df can create an ID column from the file paths
  set_names() %>% 
  map_df(~read_xlsx(.x, sheet = " cDNA synthesis calculation", skip = 1), .id = "source_file") %>%
  
  # 1. Handle filters
  filter(!is.na(`Patient ID`)) %>%
  
  # 2. Extract Metadata from filename & Recode
  mutate(
    # Extracts the first part (e.g., BR-2967)
    run_id = str_split(basename(source_file), " ", simplify = TRUE)[,1],
    
    # Extracts the date at the end (e.g., 12-16-2024) and converts to Date object
    # This regex looks for MM-DD-YYYY before the .xlsx extension
    run_date = str_extract(basename(source_file), "\\d{1,2}-\\d{1,2}-\\d{4}") %>% 
                mdy(),
    
    `ddPCR ID#`    = as.character(`ddPCR #`),
    RNA_vol_ul     = `Sample Volume for RNA synthesis (ul)`,
    RNA_conc_ng_ul = `RNA conc (ng/ul) Qubit`,
    
    `DNA template used` = (1000 / 7.1) * RNA_vol_ul / RNA_conc_ng_ul
  ) %>%
  
  # 3. Select and Standardize
  select(
    run_id,
    run_date,
    `ddPCR ID#`, 
    GlobalSpecimenID  = `Patient ID`, 
    RNA_vol_ul, 
    RNA_conc_ng_ul,
    run = `ddPCR #`
  )


# dput(unique(sort(map_df1$GlobalSpecimenID)))

rna_raw <- readRDS(paste0("/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/outputs/rna_raw_unlabeled.RDS"))
# glimpse(rna_raw)
map_df2 <- rna_raw|>dplyr::select(rna_ddpcr_runid,rna_ddpcr_id,pid,rna_spec_abbr,collect_date)|>
        dplyr::rename(run=rna_ddpcr_runid,
                      `GlobalSpecimenID`=rna_ddpcr_id,
                      acronyms=rna_spec_abbr,
                      sampling_date=collect_date)
map_df <- map_df2|>dplyr::left_join(map_df1|>dplyr::select(-run),by=c("GlobalSpecimenID" ="GlobalSpecimenID","run"="run_id"))|>
        dplyr::filter(!is.na(RNA_vol_ul))

num_cols <- names(map_df)[sapply(map_df, is.numeric)]
datatable(
  map_df,
  caption = 'Mapping Samples',
  filter = 'top',
  options = list(pageLength = 10, autoWidth = TRUE, scrollX = TRUE),
  rownames = FALSE
) %>% 
  formatRound(columns = num_cols, digits = 2)

Process ddPCR Raw Data

Show the R code
ddpcr_path<- "/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/data/2026-02-25/"
ddpcr_files <- list.files(ddpcr_path, pattern = ".xlsx", full.names = TRUE)



# ---  Processing Function ---
process_ddpcr <- function(file_path) {
  # Extract Run ID from filename
  run_id <- str_split( basename(file_path)," ",simplify = T)[,1]
  
  read_xlsx(file_path,sheet = "raw data") %>%
          dplyr::rename(Concentration=`Concentration (copies/ul)`)|>
    # Use the column names confirmed by your dput 
    select(Sample, Target, `Concentration`, Status) %>%
    mutate(
      # FIX: Force Concentration to numeric to prevent the 'bind_rows' error
      Concentration = as.numeric(Concentration), 
      Sample = as.character(Sample)
    ) %>%
    # Filter out non-numeric samples and keep only valid statuses 
    # filter(grepl("SW",Sample), is.na(Status)) %>% 
    filter(grepl(paste(map_df$GlobalSpecimenID,collapse = "|"),Sample), is.na(Status)) %>% 

              # Pivot so targets become columns
    pivot_wider(id_cols = Sample, names_from = Target, values_from = Concentration) %>%
    mutate(RunID = run_id)|>
          dplyr::rename(`ms tat Rev FAM`=`ms tat REV_FAM`,
                        `Gag HEX`=GAG_HEX)
}
     
    # msTatRev_well = `ms tat Rev FAM` * VOL_PCR_RXN,
    # skGag_well    = `Gag HEX` * VOL_PCR_RXN,
ddpcr_data <- ddpcr_files %>%
  map_df(~process_ddpcr(.x), .id = "runnum") %>%
  mutate(`ddPCR ID#` = paste0(runnum, "-", Sample))
datatable(
  ddpcr_data|>dplyr::select(RunID,runnum,Sample, `ddPCR ID#`,
                            `ms tat Rev FAM`, `Gag HEX`),
  caption = 'Preprocessing Results',
  filter = 'top',
  options = list(pageLength = 10, autoWidth = TRUE, scrollX = TRUE),
  rownames = FALSE
) %>%
  formatRound(columns = 4:5, digits = 2) %>%
  formatStyle(
    c('ms tat Rev FAM', 'Gag HEX'),
    backgroundColor = '#f0f8ff',
    fontWeight = 'bold'
  )

Join and Calculate

Show the R code
# Using the ddpcr_data tibble you just generated
final_results <- map_df %>%
  inner_join(ddpcr_data, by = c("GlobalSpecimenID"="Sample","run"="RunID")) %>%
        dplyr::left_join(mymap_df|>dplyr::select(acronyms,location),by="acronyms")|>
        dplyr::relocate(location,.after = acronyms)|>
  mutate(
    # cDNA Synthesis Calculation
    # cDNA concentration = (RNA concentration * RNA volume) / total synthesis volume
    cDNA_conc_ul = (RNA_conc_ng_ul * VOL_RNA_IN_CDNA) / TOTAL_VOL_CDNA,
    
    # Concentration in the ddPCR reaction
    cDNA_conc_ddpcr = (cDNA_conc_ul * VOL_CDNA_IN_PCR) / VOL_PCR_RXN,
    
    # Mass of cDNA per well
    ng_well_cDNA = cDNA_conc_ddpcr * VOL_PCR_RXN,
    
    # Absolute copies per well
    msTatRev_well = `ms tat Rev FAM` * VOL_PCR_RXN,
    skGag_well    = `Gag HEX` * VOL_PCR_RXN,
    
    # Final Normalization to 1000ng
    msTatRev_1000ng = (msTatRev_well / ng_well_cDNA) * 1000,
    skGag_1000ng    = (skGag_well / ng_well_cDNA) * 1000
  )

Final Results

Show the R code
dt_display <- final_results %>%
  select(
    run,GlobalSpecimenID, pid, location,
    `RNA Input (ng/uL)` = RNA_conc_ng_ul,
    `cDNA Stock (ng/uL)` = cDNA_conc_ul,
    `cDNA in Well (ng)` = ng_well_cDNA,
    `msTatRev (cp/uL)` = `ms tat Rev FAM`,
    `skGag (cp/uL)` = `Gag HEX`,
    `msTatRev / 1000ng` = msTatRev_1000ng,
    `skGag / 1000ng` = skGag_1000ng
  )




datatable(
  dt_display,
  caption = 'Final Processed Results (Use buttons below to download)',
  filter = 'top',
  extensions = 'Buttons',
  options = list(
    dom = 'Bfrtip',      
    buttons = list(
      list(extend = 'copy', title = NULL),
      list(extend = 'csv',  title = paste0("ddPCR_Results_", Sys.Date())),
      list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
      list(extend = 'pdf',   title = paste0("ddPCR_Results_", Sys.Date()))
    ),
    pageLength = 10, 
    autoWidth = TRUE, 
    scrollX = TRUE
  ),
  rownames = FALSE
) %>%
  formatRound(columns = 5:11, digits = 2) %>%
  formatStyle(
    c('msTatRev / 1000ng', 'skGag / 1000ng'),
    backgroundColor = '#f0f8ff',
    fontWeight = 'bold'
  )

Analyses of Replicates

Data processing

Show the R code
process_ddpcr_diagnostics <- function(file_path) {
  run_id <- str_split(basename(file_path), " ", simplify = TRUE)[, 1]
  
  read_xlsx(file_path, sheet = "raw data") %>%
    dplyr::rename(Concentration = `Concentration (copies/ul)`) %>%
    select(Sample, Target, Concentration, Status) %>%
    mutate(
      Concentration = as.numeric(Concentration),
      Sample = as.character(Sample)
    ) %>%
    # Filter for individual manual replicates
    filter(
      grepl(paste(map_df$GlobalSpecimenID, collapse = "|"), Sample), 
      Status == "Manual"
    ) %>%
    group_by(Sample, Target) %>%
    summarise(
      n_pos_reps = sum(Concentration > 0, na.rm = TRUE),
      mean_conc  = mean(Concentration, na.rm = TRUE),
      sd_conc    = sd(Concentration, na.rm = TRUE),
      # CV% = (SD / Mean) * 100
      cv_percent = (sd_conc / mean_conc) * 100,
      .groups = "drop"
    ) %>%
    mutate(
      RunID = run_id,
      # Flag suspicious results: only 1/3 positive or high CV%
      reliability = case_when(
        n_pos_reps == 3 ~ "High",
        n_pos_reps == 2 ~ "Moderate",
        n_pos_reps == 1 ~ "Low (Sporadic)",
        TRUE ~ "Negative"
      )
    )
}

# Apply to all files
diagnostic_data <- ddpcr_files %>%
  map_df(~process_ddpcr_diagnostics(.x))


final_diagnosis_pos <- map_df %>%
        dplyr::select(run,GlobalSpecimenID,pid,acronyms)|>
  inner_join(diagnostic_data, by = c("GlobalSpecimenID"="Sample","run"="RunID")) %>%
        dplyr::left_join(mymap_df|>dplyr::select(acronyms,location),by="acronyms")|>
        dplyr::relocate(location,.after = acronyms)|>
        dplyr::mutate(n_target_pos=sum(n_pos_reps>0),.by = c(GlobalSpecimenID))|>
        dplyr::mutate(group=case_when(n_target_pos==0~"Double Negative",
                                      n_target_pos==1~"Single Positive",
                                      n_target_pos==2~"Double Positive",
                                      .default = NA_character_))|>
        dplyr::filter(!group=="Double Negative")|>
        dplyr::filter(n_pos_reps>0)|>
        dplyr::mutate(status=case_when(pid%in%c("LG300","LG301","LG302","LG303")~"PWoH",
                                       .default = "PWH"))

num_cols <- names(final_diagnosis_pos)[sapply(final_diagnosis_pos, is.numeric)]


datatable(
  final_diagnosis_pos,
  caption = 'Data with replicates',
  filter = 'top',
  # extensions = 'Buttons',
  options = list(
    dom = 'Bfrtip',      
    buttons = list(
      list(extend = 'copy', title = NULL),
      list(extend = 'csv',  title = paste0("ddPCR_Results_", Sys.Date())),
      list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
      list(extend = 'pdf',   title = paste0("ddPCR_Results_", Sys.Date()))
    ),
    pageLength = 10, 
    autoWidth = TRUE, 
    scrollX = TRUE
  ),
  rownames = FALSE
) %>%
  formatRound(columns = num_cols, digits = 2) 
Show the R code
  # formatStyle(
  #   c('Mean % CV across replicates', 'Perc. with 1/3 positive only (sporadic)'),
  #   backgroundColor = '#f0f8ff',
  #   fontWeight = 'bold'
  # )

Diagnosis Summary

Show the R code
diag_display <- final_diagnosis_pos %>%
  group_by(status,group,Target) %>%
  summarise(
    Total_Pos_Samples = n(),
    `Median number of positive replicate` = median(n_pos_reps),
    `Mean % CV across replicates` = mean(cv_percent, na.rm = TRUE),
    `Perc. with 1/3 positive only (sporadic)` = mean(n_pos_reps == 1) * 100 # % of samples with only 1/3 pos
  )


datatable(
  diag_display,
  caption = 'Diagnosis Summary',
  filter = 'top',
  # extensions = 'Buttons',
  options = list(
    dom = 'Bfrtip',      
    # buttons = list(
    #   list(extend = 'copy', title = NULL),
    #   list(extend = 'csv',  title = paste0("ddPCR_Results_", Sys.Date())),
    #   list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
    #   list(extend = 'pdf',   title = paste0("ddPCR_Results_", Sys.Date()))
    # ),
    pageLength = 10, 
    autoWidth = TRUE, 
    scrollX = TRUE
  ),
  rownames = FALSE
) %>%
  formatRound(columns = 6:7, digits = 2) %>%
  formatStyle(
    c('Mean % CV across replicates', 'Perc. with 1/3 positive only (sporadic)'),
    backgroundColor = '#f0f8ff',
    fontWeight = 'bold'
  )

Visualizations

Coefficients of Variation across Replicates

Show the R code
ggplot(final_diagnosis_pos, aes(x = as.factor(n_pos_reps), y = cv_percent, color = status)) +
  geom_boxplot(alpha = 0.3) +
  geom_jitter(position = position_jitterdodge(), alpha = 0.6) +
  geom_hline(yintercept = 173.2, linetype = "dashed", color = "red") +
  annotate("text", x = 1.5, y = 180, label = "Theoretical Limit (1/3 pos)", color = "red") +
  labs(
    title = "Variance Analysis: PWH vs PWoH",
    x = "Number of Positive Replicates (out of 3)",
    y = "CV (%)",
    subtitle = "CV% near 173% indicates stochastic/single-well detection"
  ) +
  theme_minimal()

Show the R code
# Create a contingency table of robustness
# table_robust <- table(final_diagnosis_pos$status, final_diagnosis_pos$n_pos_reps == 3)
# 
# # Fisher's Exact Test: Is PWH more likely to have 3/3 replicates than PWoH?
# fisher.test(table_robust)

Detection Threshold?

I am calculating the 95th percentile of the concentration found in the PWoH group. Any PWH sample with a concentration above this value can be considered a “True Positive” with 95% confidence.

Show the R code
# --- Calculate the Noise Threshold per Target ---
thresholds <- final_diagnosis_pos %>%
  filter(status == "PWoH") %>%
  group_by(Target) %>%
  summarise(
    # Use 95th percentile of the mean concentration in negatives
    limit_of_noise = quantile(mean_conc, 0.95, na.rm = TRUE),
    max_noise = max(mean_conc, na.rm = TRUE),
    n_controls = n()
  )

# print(thresholds)


ggplot(final_diagnosis_pos, aes(x = status, y = mean_conc, color = status)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.2) +
  geom_jitter(width = 0.2, alpha = 0.6) +
  # Add the threshold line (assuming Gag_HEX for this example)
  geom_hline(data = thresholds, aes(yintercept = limit_of_noise), 
             linetype = "dashed", color = "red") +
  facet_wrap(~Target, scales = "free_y") +
  scale_y_log10() + # Use log scale because ddPCR data spans orders of magnitude
  labs(
    title = "Defining the Assay Detection Threshold",
    subtitle = "Red line = 95th percentile of PWoH (Noise Floor)",
    y = "Concentration (copies/µL)",
    x = "Group"
  ) +
  theme_minimal()